
 /* This program estimates the DiD models presented in Hoen, Brunner, and Schwegman, "An analysis of home price premiums in school districts with high levels of wind energy development." The models are estimated using a stacked difference-in-differences approach to deal with heterogeneous treatment effects with staggered timing of treatment.  */
 
 
log using results, replace

	global Data "R:\CLAS_Brunner\District_Capitalization\Data"
	global Results "R:\CLAS_Brunner\District_Capitalization\Results"
	global Index "R:\CLAS_Brunner\District_Capitalization\Data\Housing Index"
	
	
	* Remove existing files 
	shell erase "$Results\*.txt"
	shell erase "$Results\*.xls"
	shell erase "$Results\*.png"
	shell erase "$Results\*.tif"
	shell erase "$Results\*.gph"
	
* This is the main dataset which is created from the program "Stack Data.do"
use "$Data\stacked_data_aug23" 


/* Next lines of code are for comparing different size donut holes but keeping the identifying districts the 
same as in the 2 mile donut hole. */
/*
drop adj_sale_price ln_adj_price yearbuilt2 livingsquarefeetallbuildings2 fullbathsallbuildings2 bedroomsallbuildings2 garagecode2X transaction

* merge m:1 ncesid year using "$Index\school_year_collapse_over1_half_updated"
* merge m:1 ncesid year using "$Index\school_year_collapse_over1_updated"
merge m:1 ncesid year using "$Index\school_year_collapse_updated"

drop if _merge==2
drop _merge
*/



* Create housing characteristics based on first year of data
* Find Enrollment at First Turbine Installation
	drop mobs 
	sort ncesid year
	by ncesid: gen obs=_n if livingsquarefeetallbuildings2~=.
	egen mobs=min(obs), by(ncesid)
	gen temp2=livingsquarefeetallbuildings2 if obs==mobs
	egen livarea=max(temp2), by(ncesid)
	drop mobs temp* obs
	
	sort ncesid year
	by ncesid: gen obs=_n if fullbathsallbuildings2~=.
	egen mobs=min(obs), by(ncesid)
	gen temp2=fullbathsallbuildings2 if obs==mobs
	egen baths=max(temp2), by(ncesid)
	drop mobs temp* obs
	
	sort ncesid year
	by ncesid: gen obs=_n if yearbuilt2~=.
	egen mobs=min(obs), by(ncesid)
	gen temp2=yearbuilt2 if obs==mobs
	egen ybuilt=max(temp2), by(ncesid)
	drop mobs temp* obs
	
	
* Create Control Variables
gen trend=year-2004

gen livarea_trend=livarea*trend
gen bath_trend=baths*trend
gen ybuilt_trend=ybuilt*trend

gen inc_trend=medhhinc_00*trend
gen col_trend=pcoll_00*trend
gen enrl_trend=enrl_fy*trend
gen elem_trend=elem*trend
gen pop55_trend=pop55_00*trend
gen nwhite=1-white_00
gen nwhite_trend=nwhite*trend
gen density_trend=pop_dens00*trend
gen rural_trend=rural*trend

gen cap_pp=cum_p_cap / denrl

drop if inc_trend==.
drop if col_trend==.



******************************************************

* Create indicator for states with no turbines
	gen state_no=0
	replace state_no=1 if fipst==1|fipst==5|fipst==11|fipst==12|fipst==13 ///
	|fipst==21|fipst==22|fipst==28|fipst==47|fipst==45|fipst==51

	
/* Sample Restrictions: Make base sample districts with a minimum enrollment of 
50 students, and drop states where there are no turbines since they do not contribute to idenfication */

egen menrl=min(denrl), by(ncesid)
drop if menrl<50
drop if state_no==1

********************************************************
	
* Sample Restrictions

* Make temporal window 4 years prior to turbine operation and up to 10 years after

	gen yrs_since=year-min_p_year
	gen treat1=1 if yrs_since~=.
    recode treat1 .=0
    replace yrs_since=0 if yrs_since==.

	gen tm6=1 if yrs_since<=-6 & yrs_since~=.
	recode tm6 .=0
	gen tp11=1 if yrs_since>=11
	recode tp11 .=0
	
	drop if tm6==1
	drop if tp11==1
	

* Drop small number of districts with a p_year but no capacity

egen maxcap=max(cum_p_cap), by(ncesid)
drop if maxcap==0 & min_p_year~=.
drop maxcap
	

***********************************************************************
* Create Binary Treatment Indicator
	gen treat=0
	replace treat=1 if year>=min_p_year & min_p_year~=.

	gen rprice=adj_sale_price*deflator
	gen lprice=log(rprice)
	
	egen maxcap=max(cap_pp), by(ncesid)
	replace maxcap=. if maxcap==0
	egen temp=min(cap_pp) if cap_pp>0 , by(ncesid)
	replace temp=0 if temp==.
	egen mincap=max(temp), by(ncesid)
	drop temp

	replace cap_pp=mcap_pp /* Use baseline enrollment */

* Create vector of control variable
	local hcontrols livarea_trend bath_trend 
	local controls col_trend inc_trend pop55_trend nwhite_trend rural_trend

* Stacked DiD Estimates for Tables 1 and 2
	
	reghdfe lprice treat [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2_stacked.xls", excel ctitle(lprice nc) append

	reghdfe lprice treat `hcontrols' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2_stacked.xls", excel ctitle(lprice c1) append

	reghdfe lprice treat  `hcontrols' `controls' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2_stacked.xls", excel ctitle(lprice c2) append

	
	foreach y of varlist rlrev_pp rtrev_pp rtexp_pp rcexp_pp rtcapital_pp ptratio {
	reghdfe `y' treat, absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2_stacked.xls", excel ctitle(`y'nc) append
	}

	foreach y of varlist rlrev_pp rtrev_pp rtexp_pp rcexp_pp rtcapital_pp ptratio {
	reghdfe `y' treat `controls', absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2_stacked.xls", excel ctitle(`y') append
	}
	
	
* Using continuos capacity Tables 1 and 2

	reghdfe lprice cap_pp  [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2c_stacked.xls", excel ctitle(lprice nc) append

	
	reghdfe lprice cap_pp  `hcontrols' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2c_stacked.xls", excel ctitle(lprice c1) append

	
	reghdfe lprice cap_pp `hcontrols' `controls' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2c_stacked.xls", excel ctitle(lprice c2) append
	
	
	foreach y of varlist rlrev_pp rtrev_pp rtexp_pp rcexp_pp rtcapital_pp ptratio {
	reghdfe `y' cap_pp, absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2c_stacked.xls", excel ctitle(`y'nc) append
	}

	foreach y of varlist rlrev_pp rtrev_pp rtexp_pp rcexp_pp rtcapital_pp ptratio {
	reghdfe `y' cap_pp `controls', absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table2c_stacked.xls", excel ctitle(`y') append
	}
	
***************************************************************


	* Figure 3: Heterogeneity by Capacity per pupil

	foreach y in lprice {
	preserve
	
	reghdfe `y' cap_pp  `hcontrols' `controls' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)

	_pctile cap_pp if cap_pp~=0, percentiles(5(5)95)
	forvalues p=1/19 {
		local p`p' = r(r`p')
		di "`p`p''"
	}
	forvalues p=1/19 {
		lincom cap_pp * `p`p''
		global coef_`p' = r(estimate)
		global se_lo_`p' = r(estimate) - 1.64*r(se)
		global se_hi_`p' = r(estimate) + 1.64*r(se)
	}
	clear
	set obs 19
	gen p = _n
	gen coef_`y' = .
	gen ci_lo_`y' = .
	gen ci_hi_`y' = .
	forvalues p=1/19 {
		replace coef_`y' = ${coef_`p'} if _n==`p'
		replace ci_lo_`y' = ${se_lo_`p'} if _n==`p'
		replace ci_hi_`y' = ${se_hi_`p'} if _n==`p'
	}
	twoway (scatter coef_`y' p, msymbol(circle) mcolor(edkblue) connect(d) lpattern(solid) lcolor(edkblue)) (line ci_lo_`y' p, lcolor(black) lpattern(dash)) ///
	(line ci_hi_`y' p, lcolor(black) lpattern(dash)), legend(order(1 2) label(1 "Point Estimate") label(2 "90% Confidence Interval")) ///
	xtitle(Installed Capacity Per Pupil) xlabel(1 "5th" 5 "25th" 10 "Median" 15 "75th" 19 "95th") xmtick(1(1)19) ytitle(Estimated Treatment Effects) `y3' graphregion(color(white)) bgcolor(white)
	
	graph export "fig_`y'.tif", replace
	restore
}

* Repeat for Fiscal Outcomes


	foreach y in rlrev_pp rtrev_pp  rtexp_pp rcexp_pp rtcapital_pp ptratio {
	preserve
	
	reghdfe `y' cap_pp  `controls' , absorb(ncesid#cohort county_year#cohort) cluster(ncesid)

	_pctile cap_pp if cap_pp~=0, percentiles(5(5)95)
	forvalues p=1/19 {
		local p`p' = r(r`p')
		di "`p`p''"
	}
	forvalues p=1/19 {
		lincom cap_pp * `p`p''
		global coef_`p' = r(estimate)
		global se_lo_`p' = r(estimate) - 1.64*r(se)
		global se_hi_`p' = r(estimate) + 1.64*r(se)
	}
	clear
	set obs 19
	gen p = _n
	gen coef_`y' = .
	gen ci_lo_`y' = .
	gen ci_hi_`y' = .
	forvalues p=1/19 {
		replace coef_`y' = ${coef_`p'} if _n==`p'
		replace ci_lo_`y' = ${se_lo_`p'} if _n==`p'
		replace ci_hi_`y' = ${se_hi_`p'} if _n==`p'
	}
	twoway (scatter coef_`y' p, msymbol(circle) mcolor(edkblue) connect(d) lpattern(solid) lcolor(edkblue)) (line ci_lo_`y' p, lcolor(black) lpattern(dash)) ///
	(line ci_hi_`y' p, lcolor(black) lpattern(dash)), legend(order(1 2) label(1 "Point Estimate") label(2 "90% Confidence Interval")) ///
	xtitle(Installed Capacity Per Pupil) xlabel(1 "5th" 5 "25th" 10 "Median" 15 "75th" 19 "95th") xmtick(1(1)19) ytitle(Estimated Treatment Effects) `y3' graphregion(color(white)) bgcolor(white)
	
	graph export "fig_`y'.tif", replace
	restore
	}

	
	*********************************************
	
* Table 3: Above versus below median capacity.
	
	reghdfe lprice treat  livarea_trend bath_trend col_trend inc_trend pop55_trend nwhite_trend rural_trend [w=transaction] if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(lprice above med) append
	
	reghdfe lprice treat  livarea_trend bath_trend col_trend inc_trend pop55_trend nwhite_trend rural_trend [w=transaction] if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(lprice below med) append
	
	reghdfe rlrev_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Local Rev. above med) append
	
	reghdfe rlrev_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend  if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Local Rev. below med) append
	
	reghdfe rtrev_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Total Rev. above med) append
	
	reghdfe rtrev_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Total Rev. below med) append
	
	reghdfe rtexp_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Total Exp. above med) append
	
	reghdfe rtexp_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Total Exp. below med) append
	
	reghdfe rcexp_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Current Exp. above med) append
	
	reghdfe rcexp_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Current Exp. below med) append
	
	reghdfe rtcapital_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Capital above med) append
	
	reghdfe rtcapital_pp treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Capital below med) append
	
	reghdfe ptratio treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap>20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Ptratio above med) append
	
	reghdfe ptratio treat col_trend inc_trend pop55_trend nwhite_trend rural_trend if maxcap<20 | maxcap==., absorb(ncesid#cohort county_year#cohort) cluster(ncesid)
	outreg2 using "$Results\table_split.xls", excel ctitle(Ptratio below med) append
	
	
	***********************************************************************************
	
	
/* Supplemental Material Falsification Tests: Do Falsification at 5 and 8 years prior since 5 years prior 
is during contruction when payments may have already started. */

gen p_yearf=min_p_year-5
gen treat=0
replace treat=1 if year>=p_yearf & min_p_year~=.
gen yrs_sincef=year-p_yearf
replace yrs_sincef=0 if yrs_sincef==.
by ncesid: gen lcap_pp=mcap_pp[_n+5]
replace lcap_pp=0 if lcap_pp==.
drop if year>=min_p_year & min_p_year~=.

gen tpf=1 if yrs_sincef>=5
gen tm6=1 if yrs_sincef<=-6 & yrs_sincef~=.

drop if tpf==1
drop if tm6==1

	
	reghdfe lprice lcap_pp `hcontrols' `controls' [w=transaction], absorb(ncesid#cohort county_year#cohort) cluster(ncesid)

	outreg2 using "$Results\table_false.xls", excel ctitle(lprice) append
	

foreach y of varlist rlrev_pp rtrev_pp  rtexp_pp rcexp_pp rtcapital_pp ptratio {
	reghdfe `y' lcap_pp `controls', absorb(ncesid#cohort county_year#cohort) cluster(ncesid)

	outreg2 using "$Results\table_false.xls", excel ctitle(`y') append
	}

	